# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import FormRequest
import json
from ..items import JwcSpiderItem


class JcwsSpider(scrapy.Spider):
    name = 'jcws'
    allowed_domains = ['cninfo.com.cn/']

    def start_requests(self):
        url = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
        for i in range(1, 30):
            data = {
                "pageNum": str(i),
                "pageSize": "30",
                "column": "szse",
                "tabName": "fulltext",
                "plate": "szmb;shmb",
                "stock": "",
                "searchkey": "",
                "secid": "",
                "category": "category_rcjy_szsh",
                "trade": "",
                "seDate": "2019-01-01~2020-01-01",
                "sortName": "",
                "sortType": "",
                "isHLtitle": "true",
            }
            yield FormRequest(url=url, formdata=data, callback=self.parse)

    def parse(self, response):
        content = json.loads(response.body.decode('utf-8'))
        data = content["announcements"]
        for da in data:
            name = da["secName"]
            code = da["secCode"]
            pdf_url = "http://www.cninfo.com.cn/new/announcement/download?bulletinId=%s" % da["announcementId"]

            yield scrapy.Request(url=pdf_url, meta={"name": name, "code": code, "pid": da["announcementId"]},
                                 callback=self.parse_to, dont_filter=True)

    def parse_to(self, response):
        name = response.meta["name"]
        code = response.meta["code"]
        pdf_id = response.meta['pid']
        pdf_content = response.body

        yield JwcSpiderItem(name=name, code=code, pdf_id=pdf_id, pdf_content=pdf_content)



